In [18]:
import pandas as pd
import numpy as np

import plotly.express as px

from IPython.core.display import display, HTML
In [2]:
imported_df = pd.read_csv('listings.csv')
In [3]:
df = imported_df.copy()
In [4]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16055 entries, 0 to 16054
Data columns (total 18 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   id                              16055 non-null  int64  
 1   name                            16045 non-null  object 
 2   host_id                         16055 non-null  int64  
 3   host_name                       16014 non-null  object 
 4   neighbourhood_group             16055 non-null  object 
 5   neighbourhood                   16055 non-null  object 
 6   latitude                        16055 non-null  float64
 7   longitude                       16055 non-null  float64
 8   room_type                       16055 non-null  object 
 9   price                           16055 non-null  int64  
 10  minimum_nights                  16055 non-null  int64  
 11  number_of_reviews               16055 non-null  int64  
 12  last_review                     11772 non-null  object 
 13  reviews_per_month               11772 non-null  float64
 14  calculated_host_listings_count  16055 non-null  int64  
 15  availability_365                16055 non-null  int64  
 16  number_of_reviews_ltm           16055 non-null  int64  
 17  license                         10277 non-null  object 
dtypes: float64(3), int64(8), object(7)
memory usage: 2.2+ MB

Show number of missing values:

In [5]:
df.isna().sum()
Out[5]:
id                                   0
name                                10
host_id                              0
host_name                           41
neighbourhood_group                  0
neighbourhood                        0
latitude                             0
longitude                            0
room_type                            0
price                                0
minimum_nights                       0
number_of_reviews                    0
last_review                       4283
reviews_per_month                 4283
calculated_host_listings_count       0
availability_365                     0
number_of_reviews_ltm                0
license                           5778
dtype: int64
In [6]:
df.describe()
Out[6]:
id host_id latitude longitude price minimum_nights number_of_reviews reviews_per_month calculated_host_listings_count availability_365 number_of_reviews_ltm
count 1.605500e+04 1.605500e+04 16055.000000 16055.000000 16055.000000 16055.000000 16055.000000 11772.000000 16055.000000 16055.000000 16055.000000
mean 2.822959e+07 1.173823e+08 41.391995 2.167132 112.467331 12.970103 34.153348 1.164569 17.319028 164.437745 3.762940
std 1.594047e+07 1.232569e+08 0.014416 0.017599 325.455790 31.696885 67.383300 1.439324 33.352697 136.654295 8.811417
min 1.867400e+04 3.073000e+03 41.333420 2.091590 0.000000 1.000000 0.000000 0.010000 1.000000 0.000000 0.000000
25% 1.504661e+07 8.813134e+06 41.380800 2.157060 40.000000 1.000000 0.000000 0.180000 1.000000 8.000000 0.000000
50% 2.956314e+07 5.913604e+07 41.389450 2.168150 70.000000 3.000000 5.000000 0.700000 3.000000 153.000000 0.000000
75% 4.231742e+07 2.131051e+08 41.401520 2.177390 125.000000 30.000000 35.000000 1.670000 16.000000 306.000000 4.000000
max 5.270268e+07 4.257934e+08 41.461930 2.229670 9999.000000 1124.000000 862.000000 27.000000 182.000000 365.000000 283.000000
In [7]:
df.head()
Out[7]:
id name host_id host_name neighbourhood_group neighbourhood latitude longitude room_type price minimum_nights number_of_reviews last_review reviews_per_month calculated_host_listings_count availability_365 number_of_reviews_ltm license
0 18674 Huge flat for 8 people close to Sagrada Familia 71615 Mireia And Maria Eixample la Sagrada Família 41.40420 2.17306 Entire home/apt 121 1 21 2019-10-11 0.23 19 47 0 HUTB-002062
1 23197 Forum CCIB DeLuxe★Spacious &Elegant★Large Balcony 90417 Etain (Marnie) Sant Martí el Besòs i el Maresme 41.41291 2.22063 Entire home/apt 220 4 52 2019-12-15 0.74 2 86 0 HUTB-005057
2 32711 Sagrada Familia area - Còrsega 1 135703 Nick Gràcia el Camp d'en Grassot i Gràcia Nova 41.40566 2.17015 Entire home/apt 144 2 63 2019-09-06 0.60 3 85 0 HUTB-001722
3 34981 VIDRE HOME PLAZA REAL on LAS RAMBLAS 73163 Andres Ciutat Vella el Barri Gòtic 41.37978 2.17623 Entire home/apt 181 4 156 2020-03-11 1.55 2 136 7 HUTB-001506
4 35379 Double 04 CasanovaRooms Barcelona 152232 Pablo Eixample l'Antiga Esquerra de l'Eixample 41.39036 2.15274 Private room 41 2 358 2021-07-21 4.01 4 193 41 Exempt
In [8]:
import plotly.express as px

fig = px.scatter_mapbox(df, lat="latitude", lon="longitude",
                        color_discrete_sequence=["blue"], zoom=12.5, height=800, opacity=1,
                       title='Airbnb properties in Barcelona')
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":30,"l":0,"b":0})
fig.show()
In [9]:
fig = px.scatter_mapbox(df, lat="latitude", lon="longitude",
                        color='room_type', zoom=12.5, height=800, opacity=1,
                       title='Airbnb properties in Barcelona')
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":30,"l":0,"b":0})
fig.show()
In [10]:
for district in df.neighbourhood.value_counts().index[:1]:
    dss = df[df.neighbourhood == district]
    dssvc = dss.room_type.value_counts()
    n = len(dssvc.index)
    
    labels = dssvc.index.to_list()
    parents = ([district]*n),
    parents = parents[0]
    vals = dssvc.to_list()

for district in df.neighbourhood.value_counts().index[1:5]:
    dss = df[df.neighbourhood == district]
    dssvc = dss.room_type.value_counts()
    n = len(dssvc.index)
    
    labels += dssvc.index.to_list()
    addparents = [district]*n,
    parents += addparents[0]
    vals = vals + dssvc.to_list()
In [11]:
df_top_district = pd.DataFrame([labels, vals, parents])
df_top_district = df_top_district.T
df_top_district.columns = ['type','eff','district']
df_top_district
Out[11]:
type eff district
0 Entire home/apt 1285 la Dreta de l'Eixample
1 Private room 599 la Dreta de l'Eixample
2 Hotel room 70 la Dreta de l'Eixample
3 Shared room 41 la Dreta de l'Eixample
4 Private room 653 el Raval
5 Entire home/apt 569 el Raval
6 Hotel room 9 el Raval
7 Shared room 8 el Raval
8 Entire home/apt 551 el Barri Gòtic
9 Private room 529 el Barri Gòtic
10 Shared room 13 el Barri Gòtic
11 Hotel room 1 el Barri Gòtic
12 Entire home/apt 534 Sant Pere, Santa Caterina i la Ribera
13 Private room 405 Sant Pere, Santa Caterina i la Ribera
14 Hotel room 18 Sant Pere, Santa Caterina i la Ribera
15 Shared room 11 Sant Pere, Santa Caterina i la Ribera
16 Entire home/apt 631 la Sagrada Família
17 Private room 323 la Sagrada Família
18 Hotel room 5 la Sagrada Família
19 Shared room 4 la Sagrada Família
In [12]:
fig = px.sunburst(df_top_district, path=['district','type'], values='eff' )
# Update layout for tight margin
# See https://plot.ly/python/creating-and-updating-figures/
fig.update_layout(margin = dict(t=30, l=0, r=0, b=0))
fig.update_layout(title_text="Room number per district and room type")
fig.show()
In [13]:
df_top_district['percent'] = df_top_district.apply(lambda x: 100 * x.eff / df[df.neighbourhood==x.district]['id'].count() , axis=1)
df_top_district
Out[13]:
type eff district percent
0 Entire home/apt 1285 la Dreta de l'Eixample 64.411028
1 Private room 599 la Dreta de l'Eixample 30.025063
2 Hotel room 70 la Dreta de l'Eixample 3.508772
3 Shared room 41 la Dreta de l'Eixample 2.055138
4 Private room 653 el Raval 52.703793
5 Entire home/apt 569 el Raval 45.924132
6 Hotel room 9 el Raval 0.726392
7 Shared room 8 el Raval 0.645682
8 Entire home/apt 551 el Barri Gòtic 50.365631
9 Private room 529 el Barri Gòtic 48.354662
10 Shared room 13 el Barri Gòtic 1.188300
11 Hotel room 1 el Barri Gòtic 0.091408
12 Entire home/apt 534 Sant Pere, Santa Caterina i la Ribera 55.165289
13 Private room 405 Sant Pere, Santa Caterina i la Ribera 41.838843
14 Hotel room 18 Sant Pere, Santa Caterina i la Ribera 1.859504
15 Shared room 11 Sant Pere, Santa Caterina i la Ribera 1.136364
16 Entire home/apt 631 la Sagrada Família 65.524403
17 Private room 323 la Sagrada Família 33.541018
18 Hotel room 5 la Sagrada Família 0.519211
19 Shared room 4 la Sagrada Família 0.415369
In [20]:
fig1 = px.bar(
    df_top_district,
    x='district',
    y='eff',
    facet_col='type',
    width=300*5,
    color='type'
)
fig2 = px.bar(
    df_top_district,
    x='type',
    y='percent',
    facet_col='district',
    width=300*5,
    color='type'
)
fig3 = px.bar(
    df_top_district,
    facet_col='type',
    y='percent',
    x='district',
    width=300*5,
    color='type'
)



fig1.update_layout(title_text="Room number per type per district")
fig1.update_xaxes(title_text="")

fig2.update_xaxes(title_text="")
fig2.update_layout(title_text="Room type proportion per district")

fig3.update_xaxes(title_text="")
fig3.update_layout(title_text="Room type proportion comparison per district")

display(HTML('<h2>Room type analysis the first 5 districts</h2>'))
fig1.show()
fig2.show()
fig3.show()

Room type analysis the first 5 districts